library(ggplot2)
library(rmarkdown)
suppressWarnings(library(dplyr))
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows

Loading Results

Instance Statistics

instances = read.csv("instances.csv", header = FALSE)
colnames(instances) = c("tag", "instance", "mps", "n_vars", "n_ctrs")

# Remove tag
instances$tag = NULL

# Clean instance name
instances$instance = sub("_.*", "", basename(instances$instance))
instances$mps = basename(instances$mps)

# Add 'var_cat' based on the # Variables intervals
instances$var_cat = cut(instances$n_vars, 
                   breaks = c(-Inf, 534, 2167, 22275), 
                   labels = c("small", "medium", "large"),
                   right = TRUE)

# Add 'ctr_cat' based on the # Constraints intervals
instances$ctr_cat = cut(instances$n_ctrs, 
                   breaks = c(-Inf, 351, 906, 16675), 
                   labels = c("small", "medium", "large"),
                   right = TRUE)

instances = instances[!duplicated(instances$instance), ]

paged_table(instances)

Results

time_limit = 1800

data = read.csv("results.csv", header = FALSE)
stats = c("time", "status", "reason", "objective", "n_outer_iterations", "n_inner_iterations","l1_norm", "l2_norm")
colnames(data) = c("tag", "instance", "n_vars", "n_ctrs", "desired_space_dim", "n_mutable_coefficients", "n_mutable_costs", "n_mutable_rhs", "method", "update_rule", "update_rule_parameter", "norm", "initial_penalty",  "warm_start", "sol_file", paste0("warm_start_", stats), stats, "solution_ok", "unconstrained_obj", "constrained_obj", "gap")

# Remove tag
data$tag = NULL

# Add a one-word solver description
data$solver = paste0(data$method, " - ", data$update_rule, " ", data$update_rule_parameter, " - ", data$norm, " - init ", data$initial_penalty, " - warm start ", data$warm_start)

# Add 'n_mutable_columns' based on instance name
data$n_mutable_columns = gsub(".*_(\\d+)$", "\\1", data$instance)

# Clean instance name
data$full_instance = basename(data$instance)

data$instance = sub("_.*", "", data$full_instance)

data$total_time = ifelse(is.na(data$warm_start_time), 0, data$warm_start_time) + data$time

data$solved = data$status == "Feasible" & data$total_time < time_limit

n_unsolved = sum(!data$solved)
if (n_unsolved > 0) {
  data[!data$solved,]$total_time = time_limit
}

data$n_mutable_columns <- as.numeric(as.character(data$n_mutable_columns))  # Convert to numeric

Merge Instances and Results

data = merge(data, instances[, c("instance", "var_cat", "ctr_cat")], 
                     by = "instance",
                     all.x = TRUE)

Sanity Check

n_instances = length(data$instance)
filter = data$solved & !is.na(data$gap) & data$gap >= 1e-2
n_fail = sum(filter)

cat(paste0("\nThere are ", n_instances, " instances.\n"))

There are 51839 instances.

if (n_fail > 0) {
  cat(paste0("\n**WARNING**: There were ", n_fail, " failed instances.\n")) 
  
  paged_table(data[filter,])
}
WARNING: There were 2059 failed instances.

Instances Summary à la Kurtz

sub_data = data %>%
  filter(method == "PADM",
         update_rule == "adapt",
         norm == "l1",
         initial_penalty == 5e2,
         warm_start == 1)

# Group, summarize, and sort by n_mutable_columns
summary_table <- sub_data %>%
  group_by(var_cat, ctr_cat, n_mutable_columns) %>%
  summarise(
    `# inst.` = n_distinct(instance),  # Count number of instances
    `feasible (in %)` = mean(solved) * 100,  # Percentage of feasible instances
    `# mutable objective param.` = mean(n_mutable_costs, na.rm = TRUE),  # Avg mutable objective parameters
    `# mutable constraint param.` = mean(n_mutable_coefficients, na.rm = TRUE)  # Avg mutable constraint parameters
  ) %>%
  arrange(var_cat, ctr_cat, n_mutable_columns) %>%  # Sort by var_cat, ctr_cat, and n_mutable_columns
  ungroup()
## `summarise()` has grouped output by 'var_cat', 'ctr_cat'. You can override
## using the `.groups` argument.
# Create the HTML table using kableExtra
summary_table %>%
  kable("html", align = "c", col.names = c()) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE, position = "center") %>%
  add_header_above(c("n" = 1, "m" = 1, "# mut. columns" = 1, "# inst." = 1, "feasible (in %)" = 1, 
                     "# mutable objective param." = 1, "# mutable constraint param." = 1)) %>%
  group_rows("small", 1, 6) %>%
  group_rows("medium", 7, 15) %>%
  group_rows("large", 16, 24)
n
m
# mut. columns
# inst.
feasible (in %)
# mutable objective param.
# mutable constraint param.
small
small small 1 28 32.32143 0.3017857 4.376786
small small 5 28 50.00000 1.5535714 21.175000
small small 10 28 57.50000 4.0607143 54.962500
small medium 1 6 23.33333 0.7500000 5.216667
small medium 5 6 54.16667 3.9166667 33.758333
small medium 10 6 63.33333 10.4500000 85.125000
medium
medium small 1 4 51.25000 0.6125000 5.925000
medium small 5 4 80.00000 2.5875000 22.575000
medium small 10 4 90.00000 6.7500000 54.125000
medium medium 1 21 37.85714 0.4500000 10.916667
medium medium 5 21 47.61905 2.3119048 19.916667
medium medium 10 21 52.61905 6.0190476 37.257143
medium large 1 8 32.50000 0.5312500 2.850000
medium large 5 8 50.00000 2.0500000 10.675000
medium large 10 8 59.37500 5.4500000 26.206250
large
large small 1 2 25.00000 0.4250000 12.425000
large small 5 2 47.50000 0.9250000 74.125000
large small 10 2 50.00000 2.0250000 199.400000
large medium 1 6 26.66667 0.5333333 2.250000
large medium 5 6 35.83333 2.7583333 8.975000
large medium 10 6 44.16667 7.3250000 22.016667
large large 1 21 45.71429 0.5500000 5.126190
large large 5 21 53.33333 2.3690476 20.792857
large large 10 21 55.71429 6.0666667 53.085714
bar_data = sub_data %>%
  filter(solved == TRUE) %>%  # Only consider solved instances
  group_by(instance, n_mutable_columns) %>%
  summarise(solved_count = n(), .groups = "drop")  # Count solved instances

# Create bar plot
ggplot(bar_data, aes(x = instance, y = solved_count, fill = factor(n_mutable_columns))) +
  geom_bar(stat = "identity", position = "dodge") +  # Bar plot with bars side-by-side
  coord_flip() +
  labs(
    title = "Number of Solved Instances by n_mutable_columns for Each Instance",
    x = "Instance",
    y = "Number of Solved Instances",
    fill = "n_mutable_columns"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "top")  # Rotate x-axis labels for better readability

Computational Time Analysis

for (norm in unique(data$norm)) {
  
  sub_data = data[data$norm == norm, ]
  
  plot = ggplot(sub_data, aes(x = total_time, group = solver, color = solver)) + 
    stat_ecdf(geom = "step") + 
    labs(title = paste0("ECDF of Time for Each Solver using ", norm, " norm"),
         x = "Time",
         y = "ECDF",
         color = "Solver") +
    theme_minimal() +
    theme(legend.position = "bottom") +
    scale_x_continuous(breaks = seq(0, max(sub_data$total_time), by = 60), limits = c(0, time_limit)) +
    scale_y_continuous(breaks = seq(0, 1, by = 0.1))

  print(plot)
}

for (norm in unique(data$norm)) {
  
  sub_data = data[data$norm == norm, ]
  
  plot = ggplot(sub_data, aes(x = total_time, group = solver, color = solver)) + 
    stat_ecdf(geom = "step") + 
    labs(title = paste0("ECDF of Time for Each Solver using ", norm, " norm"),
         x = "Time",
         y = "ECDF",
         color = "Solver") +
    theme_minimal() +
    theme(legend.position = "bottom") +
    scale_x_continuous(breaks = seq(0, max(sub_data$total_time), by = 180), limits = c(0, time_limit)) +
    scale_y_continuous(breaks = seq(0, 1, by = 0.25)) +
    facet_wrap(~ n_mutable_columns, ncol = 3)  # Facet by mutable_columns, 3 columns
  
  print(plot)
}

What Can We Solve?

sub_data = data[data$update_rule == "adapt" & data$norm == "l1" & data$initial_penalty == 5e2,]

# Create a scatter plot with colors based on the normalized time
ggplot(sub_data, aes(x = n_vars, y = n_ctrs, color = as.numeric(solved))) +
  geom_point(alpha = .5, size = 1) +  # Transparency and point size
  scale_x_log10() +  # Logarithmic scale for n_vars
  scale_y_log10() +  # Logarithmic scale for n_ctrs
  scale_color_gradient(low = "red", high = "green") +  # Color gradient
  labs(title = "Scatter Plot of (n_vars, n_ctrs) with Log Scale and Transparency",
       x = "Number of Variables (n_vars)",
       y = "Number of Constraints (n_ctrs)",
       color = "Solved") +
  theme_minimal()  # Minimal theme for clean look

Solution Analysis

for (norm in unique(data$norm)) {
  sub_data = data[ data$norm == norm & data$time < time_limit,]
  
  plot = ggplot(sub_data, aes(x = l1_norm, group = warm_start, color = as.character(warm_start))) + 
    stat_ecdf(geom = "step") + 
    labs(title = paste0("ECDF of l1-norm of CE when using the objective function: ", norm),
         x = "l1-norm of CE",
         y = "ECDF") +
    scale_x_log10() +
    theme_minimal() +
    theme(legend.position = "bottom")
  
  print(plot)
}
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 1794 rows containing non-finite outside the scale range
## (`stat_ecdf()`).

## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 5755 rows containing non-finite outside the scale range
## (`stat_ecdf()`).

## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 6002 rows containing non-finite outside the scale range
## (`stat_ecdf()`).

## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 6339 rows containing non-finite outside the scale range
## (`stat_ecdf()`).

## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 6325 rows containing non-finite outside the scale range
## (`stat_ecdf()`).